{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_frame <- read.csv(\"exercise5_4.csv\")\n",
    "attach(data_frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Call:\n",
      "lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame)\n",
      "\n",
      "Prior probabilities of groups:\n",
      "        1         2 \n",
      "0.3428571 0.6571429 \n",
      "\n",
      "Group means:\n",
      "        x1       x2       x3      x4        x5        x6       x7\n",
      "1 7.358333 73.66667 1.458333 6.00000 15.250000 0.1716667 49.50000\n",
      "2 7.686957 67.43478 2.043478 5.23913  6.347826 0.2156522 70.34783\n",
      "\n",
      "Coefficients of linear discriminants:\n",
      "             LD1\n",
      "x1 -1.747293e-01\n",
      "x2  1.416021e-05\n",
      "x3  2.072826e-01\n",
      "x4 -2.052228e-01\n",
      "x5 -1.935168e-01\n",
      "x6  8.395266e+00\n",
      "x7  1.917129e-02\n"
     ]
    }
   ],
   "source": [
    "library(MASS)\n",
    "# 先验概率按比例分配\n",
    "ld_1 <- lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame)\n",
    "print(ld_1)\n",
    "# Prior Probalilities of groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "w = -0.17472934*x1 + 1.416e-05*x2 + 0.20728259*x3 + -0.20522277*x4 + -0.1935168*x5 + 8.395266*x6 + 0.01917129*x7 +"
     ]
    }
   ],
   "source": [
    "expression <- paste(round(ld_1 $ scaling, 8), \"*x\", 1 : 7, \" +\", sep=\"\")\n",
    "cat(\"w =\", expression)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_1 <- predict(ld_1)\n",
    "pred_table_1 <- data.frame(pred_1, data.group = group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "     \n",
       "group  1  2\n",
       "    1 11  1\n",
       "    2  1 22"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "0.0571428571428571"
      ],
      "text/latex": [
       "0.0571428571428571"
      ],
      "text/markdown": [
       "0.0571428571428571"
      ],
      "text/plain": [
       "[1] 0.05714286"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 误判情况\n",
    "table(group, pred_1 $ class)\n",
    "# 误判率为\n",
    "error11 <- pred_table_1[pred_1 $ class != data_frame $ group,]\n",
    "length(error11 $ class) / length(pred_1 $ class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "     \n",
       "group  1  2\n",
       "    1  8  4\n",
       "    2  2 21"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "0.171428571428571"
      ],
      "text/latex": [
       "0.171428571428571"
      ],
      "text/markdown": [
       "0.171428571428571"
      ],
      "text/plain": [
       "[1] 0.1714286"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ld_b_1 <- lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame, CV = TRUE)\n",
    "table(group, ld_b_1 $ class)\n",
    "# 交叉确认误判率\n",
    "error21 <- pred_table_1[ld_b_1 $ class != data_frame $ group, ]\n",
    "length(error21 $ class) / length(pred_1 $ class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Call:\n",
      "lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame, \n",
      "    prior = c(1, 1)/2)\n",
      "\n",
      "Prior probabilities of groups:\n",
      "  1   2 \n",
      "0.5 0.5 \n",
      "\n",
      "Group means:\n",
      "        x1       x2       x3      x4        x5        x6       x7\n",
      "1 7.358333 73.66667 1.458333 6.00000 15.250000 0.1716667 49.50000\n",
      "2 7.686957 67.43478 2.043478 5.23913  6.347826 0.2156522 70.34783\n",
      "\n",
      "Coefficients of linear discriminants:\n",
      "             LD1\n",
      "x1 -1.747293e-01\n",
      "x2  1.416021e-05\n",
      "x3  2.072826e-01\n",
      "x4 -2.052228e-01\n",
      "x5 -1.935168e-01\n",
      "x6  8.395266e+00\n",
      "x7  1.917129e-02\n"
     ]
    }
   ],
   "source": [
    "# 先验概率相等\n",
    "ld_2 <- lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame, prior = c(1, 1) / 2)\n",
    "print(ld_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "w = -0.17472934*x1 + 1.416e-05*x2 + 0.20728259*x3 + -0.20522277*x4 + -0.1935168*x5 + 8.395266*x6 + 0.01917129*x7 +"
     ]
    }
   ],
   "source": [
    "expression <- paste(round(ld_2 $ scaling, 8), \"*x\", 1 : 7, \" +\", sep=\"\")\n",
    "cat(\"w =\", expression)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_2 <- predict(ld_2)\n",
    "pred_table_2 <- data.frame(pred_2, data.group = group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "     \n",
       "group  1  2\n",
       "    1 11  1\n",
       "    2  1 22"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "0.0571428571428571"
      ],
      "text/latex": [
       "0.0571428571428571"
      ],
      "text/markdown": [
       "0.0571428571428571"
      ],
      "text/plain": [
       "[1] 0.05714286"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 误判情况\n",
    "table(group, pred_2 $ class)\n",
    "# 误判率为\n",
    "error12 <- pred_table_2[pred_2 $ class != data_frame $ group,]\n",
    "length(error12 $ class) / length(pred_2 $ class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "     \n",
       "group  1  2\n",
       "    1 11  1\n",
       "    2  3 20"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "0.114285714285714"
      ],
      "text/latex": [
       "0.114285714285714"
      ],
      "text/markdown": [
       "0.114285714285714"
      ],
      "text/plain": [
       "[1] 0.1142857"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ld_b_2 <- lda(group ~ x1 + x2 + x3 + x4 + x5 + x6 + x7, data = data_frame, prior = c(1, 1) / 2, CV = TRUE)\n",
    "table(group, ld_b_2 $ class)\n",
    "# 交叉确认误判率\n",
    "error22 <- pred_table_2[ld_b_2 $ class != data_frame $ group, ]\n",
    "length(error22 $ class) / length(pred_2 $ class)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.1.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
